import os
from tqdm import tqdm

rootPath = "data/speechocean/Script"
wavRootPath = "data/speechocean/Wav"

listdir = os.listdir(rootPath)
WavListdir = os.listdir(wavRootPath)
print(listdir)
i = 0
for item in listdir:
    with open(rootPath + "/" + item, 'r') as f:
        data = f.readlines()
    for line in tqdm(data):
        wav_path, han, pin = line.strip().split("\t")
        for zm in range(65, 91):
            han = han.replace('<'+chr(zm)+'>', '')
            han = han.replace('</'+chr(zm)+'>', '')
        for wavPath in WavListdir[:1]:
            wav_path1 = wavRootPath + "/" + wavPath + "/" + item[:4] + "/" + wav_path + ".wav"
            print(wav_path1)
            lineStr = wav_path1 + "\t" + pin + "\t" + han + "\n"
            if i % 50 == 0:
                with open("../data/speechocean_dev.txt", 'a') as devf:
                    devf.write(lineStr)
                devf.close()
            else:
                with open("../data/speechocean_train.txt", 'a') as trainf:
                    trainf.write(lineStr)
                trainf.close()
            i += 1
